library(magrittr)
library(plyr)
library(tidyverse)
library(readxl)


rm(list=ls())
home = 'C:/Users/Jason/Dropbox/VNA_Responsiveness/Analysis/JOP-dataverse/'


papi = paste0(home, 'papi-education-formatted-2017.xlsx') %>%
  read_xlsx
pci  = paste0(home, 'pci-education-formatted-2017.xlsx') %>%
  read_xlsx
provinces = papi$province


# Left panel
combined = data.frame(PAPI=100-rowMeans(subset(papi, 
                                               select=-c(province,
                                                         header))),
                      PCI=100-rowMeans(subset(pci, 
                                              select=-c(province,
                                                        header))),
                      province=papi$province)


averages = combined %>%
  ggplot(aes(x=PAPI, y=PCI)) +
  geom_point() +
  geom_abline(slope=1, intercept=0, linetype=2, color='red', size=1) +
  geom_text(aes(label=province), 
            size=3, vjust=1) +
  labs(x='100% - Average of PAPI statistics (% Satisfied)', 
       y='100% - Average of PCI statistics (% Satisfied)') +
  coord_fixed(ratio=1, xlim=c(0, 100), ylim=c(0, 100), expand=F) +
  theme_minimal() +
  theme(panel.border=element_rect(fill=NA, color='black'))
averages


# Right panel
papi = papi %>%
  subset(select=-c(province,
                   header)) %>%
  prcomp(scale.=T)
pci = pci %>%
  subset(select=-c(province,
                   header)) %>%
  prcomp(scale.=T)
combined = data.frame(PAPI=papi$x[,'PC1'], 
                      PCI=pci$x[,'PC1'], 
                      province=provinces) %>%
  mutate(PAPI=PAPI*(-1), 
         PCI=PCI*(-1))


pca = combined %>%
  ggplot(aes(x=PAPI, y=PCI)) +
  geom_point() +
  geom_abline(slope=1, intercept=0, linetype=2, color='red', size=1) +
  geom_vline(xintercept=0, linetype=2, color='black') +
  geom_hline(yintercept=0, linetype=2, color='black') +
  geom_text(aes(label=province), 
            size=3, vjust=1) +
  labs(x='First principal component, PAPI statistics', 
       y='First principal component, PCI statistics') +
  scale_x_continuous(breaks=seq(-4, 4, 1)) +
  scale_y_continuous(breaks=seq(-4, 4, 1)) +
  coord_fixed(ratio=1, 
              xlim=c(-max(abs(range(c(combined$PAPI, combined$PCI)))), 
                     max(abs(range(c(combined$PAPI, combined$PCI))))),
              ylim=c(-max(abs(range(c(combined$PAPI, combined$PCI)))), 
                     max(abs(range(c(combined$PAPI, combined$PCI)))))) +
  theme_minimal() +
  theme(panel.border=element_rect(fill=NA, color='black'))
pca


fig_6_1 = grid.arrange(averages, pca, nrow=1)


ggsave('appendix-figure-06-1.png', fig_6_1, path=home, width=16, height=8, units='in')
ggsave('appendix-figure-06-1.tiff', fig_6_1, path=home, width=16, height=8, units='in')
